<?php

// src/application/console/CreateUser.php
namespace app\console;
use phpspider\core\phpspider;
use think\console\Command;
use think\console\Input;
use think\console\Output;

class Spider extends Command
{
    // ...
    protected function configure()
    {
        $this
            // 命令的名字（"think" 后面的部分）
            ->setName('app:spider')

            // 运行 "php think list" 时的简短描述
            ->setDescription('爬虫走起')

            // 运行命令时使用 "--help" 选项时的完整命令描述
            ->setHelp("This command allows you to spider everything")
        ;
    }

    protected function execute(Input $input, Output $output)
    {
        /* Do NOT delete this comment */
        /* 不要删除这段注释 */
        $configs = array(
            'name' => '糗事百科',
            'domains' => array(
                'qiushibaike.com',
                'www.qiushibaike.com'
            ),
            'scan_urls' => array(
                'http://www.qiushibaike.com/'
            ),
            'content_url_regexes' => array(
                "http://www.qiushibaike.com/article/\d+"
            ),
            'list_url_regexes' => array(
                "http://www.qiushibaike.com/8hr/page/\d+\?s=\d+"
            ),
            'fields' => array(
                array(
                    // 抽取内容页的文章内容
                    'name' => "article_content",
                    'selector' => "//*[@id='single-next-link']",
                    'required' => true
                ),
                array(
                    // 抽取内容页的文章作者
                    'name' => "article_author",
                    'selector' => "//div[contains(@class,'author')]//h2",
                    'required' => true
                ),
            ),
        );

        $spider = new phpspider($configs);
        $spider->start();
    }
}